// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is based on code available under the Apache license here:
//   https://github.com/apache/incubator-doris/blob/master/gensrc/proto/segment_v2.proto

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// Define file format struct, like data header, index header.

syntax = "proto2";

package starrocks;
option java_package = "com.starrocks.proto";

import "olap_common.proto";
import "types.proto";

message MetadataPairPB {
    optional string key = 1;
    optional bytes value = 2;
}

enum EncodingTypePB {
    UNKNOWN_ENCODING = 0;
    DEFAULT_ENCODING = 1;
    PLAIN_ENCODING = 2;
    PREFIX_ENCODING = 3;
    RLE = 4;
    DICT_ENCODING = 5;
    BIT_SHUFFLE = 6;
    FOR_ENCODING = 7; // Frame-Of-Reference
}

enum PageTypePB {
    UNKNOWN_PAGE_TYPE = 0;
    DATA_PAGE = 1;
    INDEX_PAGE = 2;
    DICTIONARY_PAGE = 3;
    SHORT_KEY_PAGE = 4;
}

enum NullEncodingPB {
    BITSHUFFLE_NULL = 0;
    LZ4_NULL = 1;
    RLE_NULL = 2;
}

message DataPageFooterPB {
    // required: ordinal of the first value
    optional uint64 first_ordinal = 1;
    // required: number of values, including NULLs
    optional uint64 num_values = 2;
    // required: size of nullmap, 0 if the page doesn't contain NULL
    optional uint32 nullmap_size = 3;
    // array column is made up of offset and element.
    // every ordinal in offset column corresponds to a ordinal element.
    // If there is a array column like [1, 2, 3], [4, 5, 6]
    // The ordinal 2 in offset column corresponds to ordinal 4 in element column.
    //
    // ordinal of element column only for array column, largest array item ordinal + 1,
    // used to calculate the length of last array in this page
    optional uint64 corresponding_element_ordinal = 4;
    // possible values: 1, 2
    // if format_version is 1, no value will be stored in this page for NULL records;
    // if format_version is 2, a default value will be stored for each NULL record.
    // another difference is that the format 1 use Run-Length encoding to encode the null map,
    // while format 2 use the bitshuffle.
    optional uint32 format_version = 20;
    optional NullEncodingPB null_encoding = 21;
}

message IndexPageFooterPB {
    // required: number of index entries in this page
    optional uint32 num_entries = 1;

    enum Type {
        UNKNOWN_INDEX_PAGE_TYPE = 0;
        LEAF = 1;
        INTERNAL = 2;
    };
    // required: type of the index page
    optional Type type = 2;
}

message DictPageFooterPB {
    // required: encoding for dictionary
    optional EncodingTypePB encoding = 1;
}

message ShortKeyFooterPB {
    // How many index item in this index.
    optional uint32 num_items = 1;
    // The total bytes occupied by the index key
    optional uint32 key_bytes = 2;
    // The total bytes occupied by the key offsets
    optional uint32 offset_bytes = 3;
    // Segment id which this index is belong to
    optional uint32 segment_id = 4;
    // number rows in each block
    optional uint32 num_rows_per_block = 5;
    // How many rows in this segment
    optional uint32 num_segment_rows = 6;
}

message PageFooterPB {
    // required: indicates which of the *_footer fields is set
    optional PageTypePB type = 1;
    // required: page body size before compression (exclude footer and crc).
    // page body is uncompressed when it's equal to page body size
    optional uint32 uncompressed_size = 2;
    // present only when type == DATA_PAGE
    optional DataPageFooterPB data_page_footer = 7;
    // present only when type == INDEX_PAGE
    optional IndexPageFooterPB index_page_footer = 8;
    // present only when type == DICTIONARY_PAGE
    optional DictPageFooterPB dict_page_footer = 9;
    // present only when type == SHORT_KEY_PAGE
    optional ShortKeyFooterPB short_key_page_footer = 10;
}

message ZoneMapPB {
    // minimum not-null value, invalid when all values are null(has_not_null==false)
    optional bytes min = 1;
    // maximum not-null value, invalid when all values are null (has_not_null==false)
    optional bytes max = 2;
    // whether the zone has null value
    optional bool has_null = 3;
    // whether the zone has not-null value
    optional bool has_not_null = 4;
}

// Metadata for JSON type column
message JsonMetaPB {
    // Format version
    // Version 1: encode each JSON datum individually, as so called row-oriented format
    // Version 2(WIP): columnar encoding for JSON
    optional uint32 format_version = 1;
}

message ColumnMetaPB {
    // column id in table schema
    optional uint32 column_id = 1;
    // unique column id
    optional uint32 unique_id = 2;
    // this field is FieldType's value
    optional int32 type = 3;
    // var length for string type
    optional int32 length = 4;
    optional EncodingTypePB encoding = 5;
    // compress type for column
    optional CompressionTypePB compression = 6;
    // if this column can be nullable
    optional bool is_nullable = 7;
    // metadata about all the column indexes
    repeated ColumnIndexMetaPB indexes = 8;
    // pointer to dictionary page when using DICT_ENCODING
    optional PagePointerPB dict_page = 9;
    repeated ColumnMetaPB children_columns = 10;
    // required by array/struct/map reader to create child reader.
    optional uint64 num_rows = 11;
    // whether all data pages are encoded by dict encoding.
    optional bool all_dict_encoded = 30;
    // used to calculate reader chunk size in vertical compaction
    optional uint64 total_mem_footprint = 31;
    // for json column only
    optional JsonMetaPB json_meta = 32;
    // for json flat column only
    optional bytes name = 33;
}

message SegmentFooterPB {
    optional uint32 version = 1 [default = 1]; // file version
    repeated ColumnMetaPB columns = 2;         // tablet schema
    optional uint32 num_rows = 3;              // number of values
    optional uint64 index_footprint = 4;       // Deprecated
    optional uint64 data_footprint = 5;        // Deprecated
    optional uint64 raw_data_footprint = 6;    // Deprecated

    optional CompressionTypePB compress_type = 7 [default = LZ4_FRAME]; // Deprecated
    repeated MetadataPairPB file_meta_datas = 8;                        // Deprecated

    // Short key index's page
    optional PagePointerPB short_key_index_page = 9;
}

message BTreeMetaPB {
    // required: pointer to either root index page or sole data page based on is_root_data_page
    optional PagePointerPB root_page = 1;
    // required: true if we only have one data page, in which case root points to that page directly
    optional bool is_root_data_page = 2;
}

message IndexedColumnMetaPB {
    // required: FieldType value
    optional int32 data_type = 1;
    // required: encoding for this column
    optional EncodingTypePB encoding = 2;
    // required: total number of values in this column
    optional int64 num_values = 3;
    // present iff this column has ordinal index
    optional BTreeMetaPB ordinal_index_meta = 4;
    // present iff this column contains sorted values and has value index
    optional BTreeMetaPB value_index_meta = 5;
    // compression type for data and index page
    optional CompressionTypePB compression = 6 [default = NO_COMPRESSION];
    // index size
    optional uint64 size = 7;
}

// -------------------------------------------------------------
// Column Index Metadata
// -------------------------------------------------------------

enum ColumnIndexTypePB {
    UNKNOWN_INDEX_TYPE = 0;
    ORDINAL_INDEX = 1;
    ZONE_MAP_INDEX = 2;
    BITMAP_INDEX = 3;
    BLOOM_FILTER_INDEX = 4;
}

message ColumnIndexMetaPB {
    optional ColumnIndexTypePB type = 1;
    optional OrdinalIndexPB ordinal_index = 7;
    optional ZoneMapIndexPB zone_map_index = 8;
    optional BitmapIndexPB bitmap_index = 9;
    optional BloomFilterIndexPB bloom_filter_index = 10;
}

message OrdinalIndexPB {
    // required: the root page can be data page if there is only one data page,
    // or the only index page if there is more than one data pages.
    optional BTreeMetaPB root_page = 1;
}

message ZoneMapIndexPB {
    // required: segment-level zone map
    optional ZoneMapPB segment_zone_map = 1;
    // required: zone map for each data page is stored in an IndexedColumn with ordinal index
    optional IndexedColumnMetaPB page_zone_maps = 2;
}

message BitmapIndexPB {
    enum BitmapType {
        UNKNOWN_BITMAP_TYPE = 0;
        ROARING_BITMAP = 1;
    }
    optional BitmapType bitmap_type = 1 [default = ROARING_BITMAP];
    // required: whether the index contains null key.
    // if true, the last bitmap (ordinal:dict_column.num_values) in bitmap_column is
    // the bitmap for null key. we don't store null key in dict_column.
    optional bool has_null = 2;
    // required: meta for ordered dictionary part
    optional IndexedColumnMetaPB dict_column = 3;
    // required: meta for bitmaps part
    optional IndexedColumnMetaPB bitmap_column = 4;
}

enum HashStrategyPB {
    HASH_MURMUR3_X64_64 = 0;
}

enum BloomFilterAlgorithmPB {
    BLOCK_BLOOM_FILTER = 0;
    CLASSIC_BLOOM_FILTER = 1;
}

message BloomFilterIndexPB {
    // required
    optional HashStrategyPB hash_strategy = 1;
    optional BloomFilterAlgorithmPB algorithm = 2;
    // required: meta for bloom filters
    optional IndexedColumnMetaPB bloom_filter = 3;
}
